/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * CheckScheme.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.Vector;

/**
 * Abstract general class for testing schemes in Weka. Derived classes are also
 * used for JUnit tests.
 *
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 * @see TestInstances
 */
public abstract class CheckScheme extends Check {

    /** a class for postprocessing the test-data */
    public static class PostProcessor {

        /**
         * Provides a hook for derived classes to further modify the data. Currently,
         * the data is just passed through.
         * 
         * @param data the data to process
         * @return the processed data
         */
        public Instances process(Instances data) {
            return data;
        }

    }

    /** The number of instances in the datasets */
    protected int m_NumInstances = 20;

    /** the number of nominal attributes */
    protected int m_NumNominal = 2;

    /** the number of numeric attributes */
    protected int m_NumNumeric = 1;

    /** the number of string attributes */
    protected int m_NumString = 1;

    /** the number of date attributes */
    protected int m_NumDate = 1;

    /** the number of relational attributes */
    protected int m_NumRelational = 1;

    /**
     * the number of instances in relational attributes (applies also for bags in
     * multi-instance)
     */
    protected int m_NumInstancesRelational = 10;

    /** for generating String attributes/classes */
    protected String[] m_Words = TestInstances.DEFAULT_WORDS;

    /** for generating String attributes/classes */
    protected String m_WordSeparators = TestInstances.DEFAULT_SEPARATORS;

    /** for post-processing the data even further */
    protected PostProcessor m_PostProcessor = null;

    /** whether classpath problems occurred */
    protected boolean m_ClasspathProblems = false;

    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();

        result.addAll(Collections.list(super.listOptions()));

        result.addElement(new Option("\tThe number of instances in the datasets (default 20).", "N", 1, "-N <num>"));

        result.addElement(new Option("\tThe number of nominal attributes (default 2).", "nominal", 1, "-nominal <num>"));

        result.addElement(new Option("\tThe number of values for nominal attributes (default 1).", "nominal-values", 1, "-nominal-values <num>"));

        result.addElement(new Option("\tThe number of numeric attributes (default 1).", "numeric", 1, "-numeric <num>"));

        result.addElement(new Option("\tThe number of string attributes (default 1).", "string", 1, "-string <num>"));

        result.addElement(new Option("\tThe number of date attributes (default 1).", "date", 1, "-date <num>"));

        result.addElement(new Option("\tThe number of relational attributes (default 1).", "relational", 1, "-relational <num>"));

        result.addElement(new Option("\tThe number of instances in relational/bag attributes (default 10).", "num-instances-relational", 1, "-num-instances-relational <num>"));

        result.addElement(new Option("\tThe words to use in string attributes.", "words", 1, "-words <comma-separated-list>"));

        result.addElement(new Option("\tThe word separators to use in string attributes.", "word-separators", 1, "-word-separators <chars>"));

        return result.elements();
    }

    /**
     * Parses a given list of options.
     *
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
        String tmpStr;

        super.setOptions(options);

        tmpStr = Utils.getOption('N', options);
        if (tmpStr.length() != 0)
            setNumInstances(Integer.parseInt(tmpStr));
        else
            setNumInstances(20);

        tmpStr = Utils.getOption("nominal", options);
        if (tmpStr.length() != 0)
            setNumNominal(Integer.parseInt(tmpStr));
        else
            setNumNominal(2);

        tmpStr = Utils.getOption("numeric", options);
        if (tmpStr.length() != 0)
            setNumNumeric(Integer.parseInt(tmpStr));
        else
            setNumNumeric(1);

        tmpStr = Utils.getOption("string", options);
        if (tmpStr.length() != 0)
            setNumString(Integer.parseInt(tmpStr));
        else
            setNumString(1);

        tmpStr = Utils.getOption("date", options);
        if (tmpStr.length() != 0)
            setNumDate(Integer.parseInt(tmpStr));
        else
            setNumDate(1);

        tmpStr = Utils.getOption("relational", options);
        if (tmpStr.length() != 0)
            setNumRelational(Integer.parseInt(tmpStr));
        else
            setNumRelational(1);

        tmpStr = Utils.getOption("num-instances-relational", options);
        if (tmpStr.length() != 0)
            setNumInstancesRelational(Integer.parseInt(tmpStr));
        else
            setNumInstancesRelational(10);

        tmpStr = Utils.getOption("words", options);
        if (tmpStr.length() != 0)
            setWords(tmpStr);
        else
            setWords(new TestInstances().getWords());

        if (Utils.getOptionPos("word-separators", options) > -1) {
            tmpStr = Utils.getOption("word-separators", options);
            setWordSeparators(tmpStr);
        } else {
            setWordSeparators(TestInstances.DEFAULT_SEPARATORS);
        }
    }

    /**
     * Gets the current settings of the CheckClassifier.
     *
     * @return an array of strings suitable for passing to setOptions
     */
    public String[] getOptions() {
        Vector<String> result;
        String[] options;
        int i;

        result = new Vector<String>();

        options = super.getOptions();
        for (i = 0; i < options.length; i++)
            result.add(options[i]);

        result.add("-N");
        result.add("" + getNumInstances());

        result.add("-nominal");
        result.add("" + getNumNominal());

        result.add("-numeric");
        result.add("" + getNumNumeric());

        result.add("-string");
        result.add("" + getNumString());

        result.add("-date");
        result.add("" + getNumDate());

        result.add("-relational");
        result.add("" + getNumRelational());

        result.add("-words");
        result.add("" + getWords());

        result.add("-word-separators");
        result.add("" + getWordSeparators());

        return (String[]) result.toArray(new String[result.size()]);
    }

    /**
     * sets the PostProcessor to use
     * 
     * @param value the new PostProcessor
     * @see #m_PostProcessor
     */
    public void setPostProcessor(PostProcessor value) {
        m_PostProcessor = value;
    }

    /**
     * returns the current PostProcessor, can be null
     * 
     * @return the current PostProcessor
     */
    public PostProcessor getPostProcessor() {
        return m_PostProcessor;
    }

    /**
     * returns TRUE if the classifier returned a "not in classpath" Exception
     * 
     * @return true if CLASSPATH problems occurred
     */
    public boolean hasClasspathProblems() {
        return m_ClasspathProblems;
    }

    /**
     * Begin the tests, reporting results to System.out
     */
    public abstract void doTests();

    /**
     * Sets the number of instances to use in the datasets (some classifiers might
     * require more instances).
     *
     * @param value the number of instances to use
     */
    public void setNumInstances(int value) {
        m_NumInstances = value;
    }

    /**
     * Gets the current number of instances to use for the datasets.
     *
     * @return the number of instances
     */
    public int getNumInstances() {
        return m_NumInstances;
    }

    /**
     * sets the number of nominal attributes
     * 
     * @param value the number of nominal attributes
     */
    public void setNumNominal(int value) {
        m_NumNominal = value;
    }

    /**
     * returns the current number of nominal attributes
     * 
     * @return the number of nominal attributes
     */
    public int getNumNominal() {
        return m_NumNominal;
    }

    /**
     * sets the number of numeric attributes
     * 
     * @param value the number of numeric attributes
     */
    public void setNumNumeric(int value) {
        m_NumNumeric = value;
    }

    /**
     * returns the current number of numeric attributes
     * 
     * @return the number of numeric attributes
     */
    public int getNumNumeric() {
        return m_NumNumeric;
    }

    /**
     * sets the number of string attributes
     * 
     * @param value the number of string attributes
     */
    public void setNumString(int value) {
        m_NumString = value;
    }

    /**
     * returns the current number of string attributes
     * 
     * @return the number of string attributes
     */
    public int getNumString() {
        return m_NumString;
    }

    /**
     * sets the number of data attributes
     * 
     * @param value the number of date attributes
     */
    public void setNumDate(int value) {
        m_NumDate = value;
    }

    /**
     * returns the current number of date attributes
     * 
     * @return the number of date attributes
     */
    public int getNumDate() {
        return m_NumDate;
    }

    /**
     * sets the number of relational attributes
     * 
     * @param value the number of relational attributes
     */
    public void setNumRelational(int value) {
        m_NumRelational = value;
    }

    /**
     * returns the current number of relational attributes
     * 
     * @return the number of relational attributes
     */
    public int getNumRelational() {
        return m_NumRelational;
    }

    /**
     * sets the number of instances in relational/bag attributes to produce
     * 
     * @param value the number of instances
     */
    public void setNumInstancesRelational(int value) {
        m_NumInstancesRelational = value;
    }

    /**
     * returns the current number of instances in relational/bag attributes to
     * produce
     * 
     * @return the number of instances
     */
    public int getNumInstancesRelational() {
        return m_NumInstancesRelational;
    }

    /**
     * turns the comma-separated list into an array
     * 
     * @param value the list to process
     * @return the list as array
     */
    protected static String[] listToArray(String value) {
        StringTokenizer tok;
        Vector<String> list;

        list = new Vector<String>();
        tok = new StringTokenizer(value, ",");
        while (tok.hasMoreTokens())
            list.add(tok.nextToken());

        return (String[]) list.toArray(new String[list.size()]);
    }

    /**
     * turns the array into a comma-separated list
     * 
     * @param value the array to process
     * @return the array as list
     */
    protected static String arrayToList(String[] value) {
        String result;
        int i;

        result = "";

        for (i = 0; i < value.length; i++) {
            if (i > 0)
                result += ",";
            result += value[i];
        }

        return result;
    }

    /**
     * returns a string representation of the attribute type
     * 
     * @param type the attribute type to get a string rerpresentation for
     * @return the string representation
     */
    public static String attributeTypeToString(int type) {
        String result;

        switch (type) {
        case Attribute.NUMERIC:
            result = "numeric";
            break;

        case Attribute.NOMINAL:
            result = "nominal";
            break;

        case Attribute.STRING:
            result = "string";
            break;

        case Attribute.DATE:
            result = "date";
            break;

        case Attribute.RELATIONAL:
            result = "relational";
            break;

        default:
            result = "???";
        }

        return result;
    }

    /**
     * Sets the comma-separated list of words to use for generating strings. The
     * list must contain at least 2 words, otherwise an exception will be thrown.
     * 
     * @param value the list of words
     * @throws IllegalArgumentException if not at least 2 words are provided
     */
    public void setWords(String value) {
        if (listToArray(value).length < 2)
            throw new IllegalArgumentException("At least 2 words must be provided!");

        m_Words = listToArray(value);
    }

    /**
     * returns the words used for assembling strings in a comma-separated list.
     * 
     * @return the words as comma-separated list
     */
    public String getWords() {
        return arrayToList(m_Words);
    }

    /**
     * sets the word separators (chars) to use for assembling strings.
     * 
     * @param value the characters to use as separators
     */
    public void setWordSeparators(String value) {
        m_WordSeparators = value;
    }

    /**
     * returns the word separators (chars) to use for assembling strings.
     * 
     * @return the current separators
     */
    public String getWordSeparators() {
        return m_WordSeparators;
    }

    /**
     * Compare two datasets to see if they differ.
     *
     * @param data1 one set of instances
     * @param data2 the other set of instances
     * @throws Exception if the datasets differ
     */
    protected void compareDatasets(Instances data1, Instances data2) throws Exception {

        if (!data2.equalHeaders(data1)) {
            throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1));
        }
        if (!(data2.numInstances() == data1.numInstances())) {
            throw new Exception("number of instances has changed");
        }
        for (int i = 0; i < data2.numInstances(); i++) {
            Instance orig = data1.instance(i);
            Instance copy = data2.instance(i);
            for (int j = 0; j < orig.numAttributes(); j++) {
                if (orig.isMissing(j)) {
                    if (!copy.isMissing(j)) {
                        throw new Exception("instances have changed");
                    }
                } else if (orig.value(j) != copy.value(j)) {
                    throw new Exception("instances have changed");
                }
                if (orig.weight() != copy.weight()) {
                    throw new Exception("instance weights have changed");
                }
            }
        }
    }

    /**
     * Add missing values to a dataset.
     *
     * @param data             the instances to add missing values to
     * @param level            the level of missing values to add (if positive, this
     *                         is the probability that a value will be set to
     *                         missing, if negative all but one value will be set to
     *                         missing (not yet implemented))
     * @param predictorMissing if true, predictor attributes will be modified
     * @param classMissing     if true, the class attribute will be modified
     */
    protected void addMissing(Instances data, int level, boolean predictorMissing, boolean classMissing) {

        int classIndex = data.classIndex();
        Random random = new Random(1);
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < data.numAttributes(); j++) {
                if (((j == classIndex) && classMissing) || ((j != classIndex) && predictorMissing)) {
                    if (random.nextInt(100) < level)
                        current.setMissing(j);
                }
            }
        }
    }

    /**
     * Provides a hook for derived classes to further modify the data.
     * 
     * @param data the data to process
     * @return the processed data
     * @see #m_PostProcessor
     */
    protected Instances process(Instances data) {
        if (getPostProcessor() == null)
            return data;
        else
            return getPostProcessor().process(data);
    }
}
